# Install packages if needed
# install.packages(c("knitr", "dplyr", "survival", "ggplot2", "here", "tibble"))
library(knitr)
library(dplyr)
library(survival)
library(ggplot2)
library(tibble)
# devtools::install_github("zabore/ezfun")
ezfun::set_ccf_palette("contrast")
# install.packages(c("lubridate", "ggsurvfit", "gtsummary", "tidycmprsk"))
library(lubridate)
library(ggsurvfit)
library(gtsummary)
library(tidycmprsk)
# devtools::install_github("zabore/condsurv")
library(condsurv)
library(psych)
library(kableExtra)


############
load(file = "C:\\Users\\olive\\OneDrive\\Desktop\\Adv Suv Analysis\\df_exercise.RData")
head(dfex)

#create event variable
dfex$event<-ifelse(dfex$censor==1, 0, 1)
head(dfex)

#***************************************************************
#### 1. Create Survival Object and fit  using Kaplan Meier method ####
#***************************************************************

# Create a "survival object" using Surv from package "survival"
surv_obj <- Surv(dfex$time, dfex$event)
# double check that the Surv is reading the file correctly:
Surv(dfex$time, dfex$event)[1:10]


#***************************************************************
#### 2. Kaplan-Meier and Survivor function with 95% CIs ####
#***************************************************************


# The survfit() function uses the Kaplan-Meier method to generate key stats:
# I will use this to generate object "km_fit" that holds stats I will need
# Note that the ~ 1 means "no grouping variable" or "fit a single survival curve for
#all observations".
km_fit <- survfit(Surv(time, event) ~ 1, data = dfex)
# summary will provide the full table, e.g. the risk set and number of events for each interval 
summary(km_fit)
# I can display more information, e.g. n censored and cumulative hazard (cumhaz)
# Get a data frame summary of the survfit object
km_table <- summary(km_fit, data.frame = TRUE)
print(km_table)

# the table shows the median lifetime is 160 days:
# it can also be shown in this way:
survfit(Surv(time, event) ~ 1, data = dfex)


# I can put the Kaplan Meier estimates information in a nicer table
library(knitr)
library(kableExtra)
# Get the summary as a data frame
km_table <- summary(km_fit, data.frame = TRUE)

# Select and rename columns if you wish
km_table_nice <- km_table[, c("time", "n.risk", "n.event", "n.censor", "surv", "cumhaz")]
colnames(km_table_nice) <- c("Time", "At Risk", "Events", "Censored", "Survival", "Cum. Hazard")
# Create a nice HTML table
km_table_nice %>%
  kable(format = "html", digits = 3, caption = "Kaplan-Meier Survival Table") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE, 
                position = "center")


#km_fit already provided this information, see the "summary" above
#It can be easily plotted using "plot" command:
plot(km_fit,
     xlab = "Days",
     ylab = "Overall survival probability",
     conf.int = TRUE)  # This adds the 95% CIs

# I'll use library "survminer" to customise the graph.
library(survminer)
# Plot survival with custom CI color and line size
gsurv<-ggsurvplot(
  km_fit,
  conf.int = TRUE,
  conf.int.fill = "#FFB347",     # Your custom CI color (e.g., orange)
  conf.int.alpha = 0.4,          # Transparency of CI
  palette = "#2E9Fdfex",           # Line color (e.g., blue)
  size = 1.2                     # Line thickness
)
print(gsurv)


#***************************************************************
#### 3. Cumulative Hazard (Nelson-Aalen) with 95% CIs ####
#***************************************************************
# Plot cumulative hazard with custom CI color and line size
gcumhaz<-ggsurvplot(
  km_fit,
  fun = "cumhaz",
  conf.int = TRUE,
  conf.int.fill = "#FFB347",     # Your custom CI color (e.g., orange)
  conf.int.alpha = 0.4,          # Transparency of CI
  palette = "#2E9Fdfex",           # Line color (e.g., blue)
  size = 1.5                     # Line thickness
)
print(gcumhaz)

#***************************************************************
#### 4. Hazard function with Kernel-Smoothing ####
#***************************************************************


#  Now, Estimate Hazard functions with different bandwidths
library(muhaz)
# the option bw.method = "global"  means the same bandwidth for all grid points.
# If bw.method="global" and bw.grid has one component only, no MSE minimization is performed.
# The hazard estimates are computed for the value of bw.grid.
#https://www.rdocumentation.org/packages/muhaz/versions/1.2.6.4/topics/muhaz
haz_fit_15  <- muhaz(dfex$time, dfex$event, bw.method = "global", bw.grid = 15)
haz_fit_30  <- muhaz(dfex$time, dfex$event, bw.method = "global", bw.grid = 30)
haz_fit_90  <- muhaz(dfex$time, dfex$event, bw.method = "global", bw.grid = 90)
#prepare dataset for ggplot
library(dplyr)
library(tidyr)

# Create data frames for each bandwidth
dfex_15 <- data.frame(time = haz_fit_15$est.grid, hazard = haz_fit_15$haz.est, bandwidth = "(a) 15 days")
dfex_30 <- data.frame(time = haz_fit_30$est.grid, hazard = haz_fit_30$haz.est, bandwidth = "(b) 30 days")
dfex_90 <- data.frame(time = haz_fit_90$est.grid, hazard = haz_fit_90$haz.est, bandwidth = "(c) 90 days")
# Combine into one data frame
haz_df <- bind_rows(dfex_15, dfex_30, dfex_90 )

library(ggplot2)
#plot these different functions
ggplot(haz_df, aes(x = time, y = hazard, color = bandwidth)) +
  geom_line(size = 1.2, alpha=.5) +
  labs(
    x = "Time (days)",
    y = "Hazard",
    title = "Kernel-Smoothed Hazard Function\n(bandwidth: 15, 30, 90 days)",
    color = "Bandwidth"
  ) +
  theme_minimal()

# change the colour scheme using "viridis"
library(viridis)
ggplot(haz_df, aes(x = time, y = hazard, color = bandwidth)) +
  geom_line(size = 1.2, alpha = .70) +
  labs(
    x = "Time (days)",
    y = "Hazard",
    title = "Kernel-Smoothed Hazard Function\n(bandwidth: 15, 30, 90 days)",
    color = "Bandwidth"
  ) +
  scale_color_viridis_d(option = "D") +  # Use the discrete viridis palette
  theme_minimal()

